# If running scripts one after the other, then should remove some elements because names are reused
# rm(list=ls())
Sys.setlocale("LC_CTYPE", "EN_US.UTF-8")
setwd("~/Dropbox/Dissertation/Ideological_Labels/Replication")
library(foreign)
library(broom)
library(ggplot2)
library(stargazer)
options(scipen = 999)


## Read in Data
data <- read.dta("1993_Survey.dta")

## Create ideology variables

## Self Identified Ideology - 我们一般以"左"、"右"来区分人们的政治态度.[出示卡片]在这张卡片上, 1 代表最左;6 代表最右. 当您考虑到以下的个人或组织时, 您觉得他们的政治态度应该放在哪一栏? 

## 您自己

data$own.ideology <- data$v78a
data$own.ideology[data$v78a %in% 8:9] <- NA

## 中国共产党

data$ccp.ideology <- data$v78b
data$ccp.ideology[data$v78b %in% 8:9] <- NA

## 您父亲

data$father.ideology <- data$v78c
data$father.ideology[data$v78c %in% 8:9] <- NA

## 国民党

data$kmt.ideology <- data$v78d
data$kmt.ideology[data$v78d %in% 8:9] <- NA

## Code survey questions

## 一个国家如果政党太多,就会导致混乱

data$pol.parties[data$v43j == "strongly agree "] <- 4 
data$pol.parties[data$v43j == "agree"] <- 3
data$pol.parties[data$v43j == "disagree"] <- 2
data$pol.parties[data$v43j == "strongly disagree "] <- 1
data$pol.parties[data$v43j == "don't know"] <- NA
data$pol.parties[data$v43j == "no answer"] <- NA

## 一种思潮该不该在群众中流传,应由政府决定

data$info.control[data$v43k == "strongly agree "] <- 4 
data$info.control[data$v43k == "agree"] <- 3
data$info.control[data$v43k == "disagree"] <- 2
data$info.control[data$v43k == "strongly disagree "] <- 1
data$info.control[data$v43k == "don't know"] <- NA
data$info.control[data$v43k == "no answer"] <- NA

## 目前在我国进一步扩大民主就会影响稳定

data$democracy.unstable[data$v43n == "strongly agree "] <- 4 
data$democracy.unstable[data$v43n == "agree"] <- 3
data$democracy.unstable[data$v43n == "disagree"] <- 2
data$democracy.unstable[data$v43n == "strongly disagree "] <- 1
data$democracy.unstable[data$v43n == "don't know"] <- NA
data$democracy.unstable[data$v43n == "no answer"] <- NA

## 老百姓对政府的决定应该不计个人得失,全力支持

data$uncond.support[data$v43p == "strongly agree "] <- 4 
data$uncond.support[data$v43p == "agree"] <- 3
data$uncond.support[data$v43p == "disagree"] <- 2
data$uncond.support[data$v43p == "strongly disagree "] <- 1
data$uncond.support[data$v43p == "don't know"] <- NA
data$uncond.support[data$v43p == "no answer"] <- NA

## 尽管我们国家的政治制度有这样那样的缺点, 但它还是最适合我国的国情

data$pol.system[data$v61d == "strongly agree"] <- 4 
data$pol.system[data$v61d == "agree"] <- 3
data$pol.system[data$v61d == "disagree"] <- 2
data$pol.system[data$v61d == "strongly disagree"] <- 1
data$pol.system[data$v61d == "don't know"] <- NA
data$pol.system[data$v61d == "no answer"] <- NA

## 政府的高级官员就象大家庭的家长,他们关于国家事务的决定,人民都应该服从

data$family.gov[data$v61l == "strongly agree"] <- 4 
data$family.gov[data$v61l == "agree"] <- 3
data$family.gov[data$v61l == "disagree"] <- 2
data$family.gov[data$v61l == "strongly disagree"] <- 1
data$family.gov[data$v61l == "don't know"] <- NA
data$family.gov[data$v61l == "no answer"] <- NA

## 我国的政治制度是世界上最好的

data$best.system[data$v61q == "strongly agree"] <- 4 
data$best.system[data$v61q == "agree"] <- 3
data$best.system[data$v61q == "disagree"] <- 2
data$best.system[data$v61q == "strongly disagree"] <- 1
data$best.system[data$v61q == "don't know"] <- NA
data$best.system[data$v61q == "no answer"] <- NA

## 国家就象一部完整的大机器,个人不过是其中的一颗螺丝钉,谈不上有独立的地位

data$cog.machine[data$v61r == "strongly agree"] <- 4 
data$cog.machine[data$v61r == "agree"] <- 3
data$cog.machine[data$v61r == "disagree"] <- 2
data$cog.machine[data$v61r == "strongly disagree"] <- 1
data$cog.machine[data$v61r == "don't know"] <- NA
data$cog.machine[data$v61r == "no answer"] <- NA


## 有人认为我国近年来政治改革速度太快,也有人认为太慢,还有人认为正好. 您的意见是什么?

data$pol.reform.too.fast[data$v79 %in% c("too fast")] <- 1 
data$pol.reform.too.fast[data$v79 %in% c("too slow")] <- -1 
data$pol.reform.too.fast[data$v79 %in% c("just right")] <- 0
data$pol.reform.too.fast[data$v79 %in% c("don't know")] <- NA 

## 有人认为近年来我国社会变化的速度太快,有人说太慢,还有人认为正好.您的意见是什么?

data$social.change.too.fast[data$v80 %in% c("too fast")] <- 1 
data$social.change.too.fast[data$v80 %in% c("too slow")] <- -1 
data$social.change.too.fast[data$v80 %in% c("just right")] <- 0
data$social.change.too.fast[data$v80 %in% c("don't know")] <- NA 

## 国家是一个大家庭,少数民族也不应该要求独立

data$anti.ethnic.indep[data$v81k == "strongly agree"] <- 4 
data$anti.ethnic.indep[data$v81k == "agree"] <- 3
data$anti.ethnic.indep[data$v81k == "disagree"] <- 2
data$anti.ethnic.indep[data$v81k == "strongly disagree"] <- 1
data$anti.ethnic.indep[data$v81k == "don't know"] <- NA
data$anti.ethnic.indep[data$v81k == "no answer"] <- NA

## 有人认为,对个人的合法收入不论多少, 都不应该加以限制; 也有人认为,对少数收入特别高的给予一定限制是必要的; 
# 对个人收入不应该有限制
# 对收入特别高的, 有一定限制是必要的.

data$restrict.income[data$v94 == "should restrict"] <- 1 
data$restrict.income[data$v94 == "should not restrict"] <- 0 
data$restrict.income[data$v94 == "don't know"] <- NA
data$restrict.income[data$v94 == "no answer"] <- NA

## 有人认为目前我国私营企业的发展已经动摇了公有制经济基础, 应该加以限制; 也有人认为私营 企业的发展有利于国民经济, 不应该加以限制; 您的意见呢?
# 不应该限制私营企业的发展
# 应该限制私营企业的发展

data$restrict.enterprise[data$v95 == "should restrict"] <- 1 
data$restrict.enterprise[data$v95 == "should not restrict"] <- 0 
data$restrict.enterprise[data$v95 == "don't know"] <- NA
data$restrict.enterprise[data$v95 == "no answer"] <- NA

## 有人认为, 我国不仅要进行经济改革, 而且应加速政治改革; 也有人认为, 政治改革会带来不稳定, 目前不宜进行; 您的意见是什么?
# 目前不宜进行政治改革
# 应该加速政治改革

data$no.polreform[data$v96 == "no political reform"] <- 1
data$no.polreform[data$v96 == "speed political reform"] <- 0 
data$no.polreform[data$v96 == "don't know"] <- NA
data$no.polreform[data$v96 == "no answer"] <- NA

##  如果台湾宣布独立,您认为我国政府应不应该使用武力,使其回归祖国?
# 即使台湾宣布独立,也不应该使用武力.
# 应该先使用和平方式,劝其放弃独立,万不得已时可以使用武力.
# 只要其宣布独立,就应该使用武力,使其回归祖国.

data$taiwan.force[data$v97 == "use force"] <- 1 
data$taiwan.force[data$v97 == "maybe force"] <- 0 
data$taiwan.force[data$v97 == "no force"] <- -1 
data$taiwan.force[data$v97 == "don't know"] <- NA

## 有人认为 "四人帮" 的主张都是反动的, 要彻底肃清; 也有人认为他们提倡的某些东西有一定的合理 因素, 不应全盘否定, 您怎么看?
# 要彻底肃清
# 不应全盘否定

data$abolish.gang[data$v106 == "abolish"] <- 1 
data$abolish.gang[data$v106 == "not  negate"] <- 0 
data$abolish.gang[data$v106 == "don't know"] <- NA
data$abolish.gang[data$v106 == "not applicable"] <- NA
data$abolish.gang[data$v106 == "no answer"] <- NA

## 有些人思想倾向 "极左",有些人 "极右",从目前情况看,您认为 "极左" 还是 "极右"对社会的危害更大?
# "极左"
# "极右"

data$extreme.ideology[data$v107 == "extreme left"] <- -1 
data$extreme.ideology[data$v107 == "extreme right"] <- 1 
data$extreme.ideology[data$v107 == "both harmful"] <- 0
data$extreme.ideology[data$v107 == "don't know"] <- NA
data$extreme.ideology[data$v107 == "no answer"] <- NA

## 如果持__________ [按上一题的选择提问, 如被访者选"极左"则问: "极左",如被访者选"极右"则问 "极 右",如被访者选 3、8、9,则问 "极左或极右"] 观点的人想
# 在大会上发言表达这种思想, 您认为应该允许吗?

data$censor.speech[data$v108a == "permit"] <- 1 
data$censor.speech[data$v108a == "not permit"] <- 0
data$censor.speech[data$v108a == "don't know"] <- NA
data$censor.speech[data$v108a == "no answer"] <- NA

# 在学校教书表达这种思想, 您认为应该允许吗?

data$censor.teacher[data$v108b == "permit"] <- 1 
data$censor.teacher[data$v108b == "not permit"] <- 0
data$censor.teacher[data$v108b == "don't know"] <- NA
data$censor.teacher[data$v108b == "no answer"] <- NA

# 写文章或出书表达这种思想, 您认为应该允许吗?

data$censor.publication[data$v108c == "permit"] <- 1 
data$censor.publication[data$v108c == "not permit"] <- 0
data$censor.publication[data$v108c == "don't know"] <- NA
data$censor.publication[data$v108c == "no answer"] <- NA

# 您喜不喜欢下面这些国家?
# 俄罗斯

data$like.russia[data$v110a == "like very much"] <- 4
data$like.russia[data$v110a == "like"] <- 3
data$like.russia[data$v110a == "dislike"] <- 2
data$like.russia[data$v110a == "dislike very much"] <- 1
data$like.russia[data$v110a == "don't know"] <- NA

# 美国

data$like.usa[data$v110b == "like very much"] <- 4
data$like.usa[data$v110b == "like"] <- 3
data$like.usa[data$v110b == "dislike"] <- 2
data$like.usa[data$v110b == "dislike very much"] <- 1
data$like.usa[data$v110b == "don't know"] <- NA


# 日本

data$like.japan[data$v110c == "like very much"] <- 4
data$like.japan[data$v110c == "like"] <- 3
data$like.japan[data$v110c == "dislike"] <- 2
data$like.japan[data$v110c == "dislike very much"] <- 1
data$like.japan[data$v110c == "don't know"] <- NA


# 德国

data$like.germany[data$v110d == "like very much"] <- 4
data$like.germany[data$v110d == "like"] <- 3
data$like.germany[data$v110d == "dislike"] <- 2
data$like.germany[data$v110d == "dislike very much"] <- 1
data$like.germany[data$v110d == "don't know"] <- NA

## 只要有了品格高尚的领导人,任何事情都可以交给他们办

data$delegate.worthy[data$v111n == "strongly agree"] <- 4
data$delegate.worthy[data$v111n == "agree"] <- 3
data$delegate.worthy[data$v111n == "disagree"] <- 2
data$delegate.worthy[data$v111n == "strongly disagree"] <- 1
data$delegate.worthy[data$v111n == "don't know"] <- NA
data$delegate.worthy[data$v111n == "no answer"] <- NA

## Political Knowledge

## US President
data$pol.know1[data$v19a == "clinton"] <- 1
data$pol.know1[data$v19a != "clinton"] <- 0

## Russian President + typo in coding

data$pol.know2[data$v19b == "yelstin"] <- 1
data$pol.know2[data$v19b != "yelstin"] <- 0

## Chinese Prime Minister (Premier)

data$pol.know3[data$v19c == "li peng"] <- 1
data$pol.know3[data$v19c != "li peng"] <- 0

## Chairman of NPC

data$pol.know4[data$v19d == "qiao shi"] <- 1
data$pol.know4[data$v19d != "qiao shi"] <- 0

## China General Secretary

data$pol.know5[data$v19e == "jiang zemin"] <- 1
data$pol.know5[data$v19e != "jiang zemin"] <- 0

## Taiwan authorities highest leader

data$pol.know6[data$v19f == "lee teng-hui"] <- 1
data$pol.know6[data$v19f != "lee teng-hui"] <- 0

## Total number correct

data$pol.knowsum <- rowSums(cbind(data$pol.know1, data$pol.know2, data$pol.know3, data$pol.know4, data$pol.know5, data$pol.know6))


################################################################################################
################################## Recoding Demographics #######################################
################################################################################################

## Age

data$years <- 1993 - data$birthyear

## Gender

data$female[data$sex == 2] <- 1
data$female[data$sex == 1] <- 0

## CCP Member = ccp

## Education = edulevel; 1 = less than elementary school (小学毕业 or below), 2 = middle school (初中毕业), 3 = high school (高中,中专,中技毕业), 4 = college and above (夜大,职大,电大,函大毕业或通过成人自学考试, OR 全日制大专,大学毕业, OR 研究生毕业)

## Income = v161-v165 or incyear BUT see below for step to recode if all answers = DK from 0 to NA. Note - incyear is not the sum of ruralincyear and urbanincyear because bonus income seems to be treated as both rural and urban income

## recoding rural agricultural income

data$rural.incAgr <- data$v161
data$rural.incAgr[data$v161 %in% c(99997:99999)] <- NA

## recoding rural non-agricultural income

data$rural.nonAgrInc <- data$v162
data$rural.nonAgrInc[data$v162 %in% c(99997:99999)] <- NA

## recoding rural sideline income

data$rural.incFuye <- data$v163
data$rural.incFuye[data$v163 %in% c(99997:99999)] <- NA

## recoding urban monthly income

data$urban.incMonth <- data$v164
data$urban.incMonth[data$v164 %in% c(9997:9999)] <- NA

## recoding bonus income

data$inc.bonus <- data$v165
data$inc.bonus[data$v165 %in% c(9997:9999)] <- NA

## recoding incyear variable to include NAs - Note - may want to recheck - seems low.

data$inc.year <- data$incyear
data$inc.year[(is.na(data$rural.incAgr) & is.na(data$rural.nonAgrInc) & is.na(data$rural.incFuye & is.na(data$urban.incMonth) & is.na(data$inc.bonus)))] <- NA

## Recoding trust in various institutions

# [出示卡片]请您参考这张卡片.这张卡片上有6个等级.6表示非常好,1表示非常不好.请您从6个等级中选择
# 一个, 表示您 对下述机构的印象.

# 法院
data$trust.courts <- data$v58a
data$trust.courts[data$v58a %in% 8:9] <- NA

# 解放军
data$trust.pla <- data$v58b
data$trust.pla[data$v58b %in% 8:9] <- NA

# 公安部门
data$trust.police <- data$v58c
data$trust.police[data$v58c %in% 8:9] <- NA

#  广播、电视、报纸等新闻机构
data$trust.media <- data$v58d
data$trust.media[data$v58d %in% 8:9] <- NA

# 地方人大

data$trust.lpc <- data$v58e
data$trust.lpc[data$v58e %in% 8:9] <- NA

# 全国人大

data$trust.npc <- data$v58f
data$trust.npc[data$v58f %in% 8:9] <- NA

# 一般政府官员

data$trust.officials <- data$v58g
data$trust.officials[data$v58g %in% 8:9] <- NA



summary(lm(pol.reform.too.fast ~ own.ideology, data = data))
summary(lm(pol.reform.too.fast ~ ccp.ideology, data = data))
summary(lm(own.ideology ~ ccp.ideology, data = data))
summary(lm(own.ideology ~ father.ideology, data = data))



################################################################################################
############################### Analysis of Response vs. Nonresponse ###########################
################################################################################################

# Did they answer the own ideology identification question?

data$own.response[!is.na(data$own.ideology)] <- 1
data$own.response[is.na(data$own.ideology)] <- 0

# Did they answer the CCP ideology identification question?

data$ccp.response[!is.na(data$ccp.ideology)] <- 1
data$ccp.response[is.na(data$ccp.ideology)] <- 0

# Did they answer the father's ideology question?

data$father.response[!is.na(data$father.ideology)] <- 1
data$father.response[is.na(data$father.ideology)] <- 0

# Did they answer the KMT's ideology question?

data$kmt.response[!is.na(data$kmt.ideology)] <- 1
data$kmt.response[is.na(data$kmt.ideology)] <- 0


# Regression Analysis

summary(lm(own.response ~ years + female + edulevel + inc.year + urban + ccp + pol.knowsum, data = data))

################################################################################################
#################################### Historical Variables ######################################
################################################################################################

# 您曾经受过政治迫害,或被定为有政治历史问题吗?

data$hist.problem[data$v174 == "yes"] <- 1
data$hist.problem[data$v174 == "no"] <- 0
data$hist.problem[data$v174 == "don't know"] <- NA
data$hist.problem[data$v174 == "not applicable"] <- NA
data$hist.problem[data$v174 == "no answer"] <- NA

# 您家有人曾经受过政治迫害,或被定为有政治历史问题吗?

data$fam.hist.problem[data$v175 == "yes"] <- 1
data$fam.hist.problem[data$v175 == "no"] <- 0
data$fam.hist.problem[data$v175 == "don't know"] <- NA
data$fam.hist.problem[data$v175 == "not applicable"] <- NA
data$fam.hist.problem[data$v175 == "no answer"] <- NA

# 文化大革命时期,您参加过红卫兵或其他革命群众组织吗?

data$red.guard[data$v176 == "yes"] <- 1
data$red.guard[data$v176 == "no"] <- 0
data$red.guard[data$v176 == "don't know"] <- NA
data$red.guard[data$v176 == "not applicable"] <- NA
data$red.guard[data$v176 == "no answer"] <- NA


################################################################################################
###################################### Issue Priorities ########################################
################################################################################################

#  目前我国面临许多问题,在您看来,哪个问题非常重要, 哪个问题比较重要, 哪个问题不太重要,哪个问题
#  不重要? 

# 保护环境

data$enviro.priority[data$v101a == "very important"] <- 4
data$enviro.priority[data$v101a == "relatively important"] <- 3
data$enviro.priority[data$v101a == "not too important"] <- 2
data$enviro.priority[data$v101a == "not important"] <- 1
data$enviro.priority[data$v101a == "don't know"] <- NA

# 巩固国防

data$defense.priority[data$v101b == "very important"] <- 4
data$defense.priority[data$v101b == "relatively important"] <- 3
data$defense.priority[data$v101b == "not too important"] <- 2
data$defense.priority[data$v101b == "not important"] <- 1
data$defense.priority[data$v101b == "don't know"] <- NA

# 解决贫富不均问题

data$inequality.priority[data$v101c == "very important"] <- 4
data$inequality.priority[data$v101c == "relatively important"] <- 3
data$inequality.priority[data$v101c == "not too important"] <- 2
data$inequality.priority[data$v101c == "not important"] <- 1
data$inequality.priority[data$v101c == "don't know"] <- NA

# 保护消费者权益 

data$consumer.priority[data$v101d == "very important"] <- 4
data$consumer.priority[data$v101d == "relatively important"] <- 3
data$consumer.priority[data$v101d == "not too important"] <- 2
data$consumer.priority[data$v101d == "not important"] <- 1
data$consumer.priority[data$v101d == "don't know"] <- NA

# 打击犯罪

data$crime.priority[data$v101e == "very important"] <- 4
data$crime.priority[data$v101e == "relatively important"] <- 3
data$crime.priority[data$v101e == "not too important"] <- 2
data$crime.priority[data$v101e == "not important"] <- 1
data$crime.priority[data$v101e == "don't know"] <- NA

# 控制物价

data$price.control.priority[data$v101f == "very important"] <- 4
data$price.control.priority[data$v101f == "relatively important"] <- 3
data$price.control.priority[data$v101f == "not too important"] <- 2
data$price.control.priority[data$v101f == "not important"] <- 1
data$price.control.priority[data$v101f == "don't know"] <- NA

# 鼓励发展个体或私营企业

data$enterprise.priority[data$v101g == "very important"] <- 4
data$enterprise.priority[data$v101g == "relatively important"] <- 3
data$enterprise.priority[data$v101g == "not too important"] <- 2
data$enterprise.priority[data$v101g == "not important"] <- 1
data$enterprise.priority[data$v101g == "don't know"] <- NA

# 计划生育

data$pop.control.priority[data$v101h == "very important"] <- 4
data$pop.control.priority[data$v101h == "relatively important"] <- 3
data$pop.control.priority[data$v101h == "not too important"] <- 2
data$pop.control.priority[data$v101h == "not important"] <- 1
data$pop.control.priority[data$v101h == "don't know"] <- NA

# 住房问题

data$housing.priority[data$v101i == "very important"] <- 4
data$housing.priority[data$v101i == "relatively important"] <- 3
data$housing.priority[data$v101i == "not too important"] <- 2
data$housing.priority[data$v101i == "not important"] <- 1
data$housing.priority[data$v101i == "don't know"] <- NA

# 对外开放

data$world.opening.priority[data$v101j == "very important"] <- 4
data$world.opening.priority[data$v101j == "relatively important"] <- 3
data$world.opening.priority[data$v101j == "not too important"] <- 2
data$world.opening.priority[data$v101j == "not important"] <- 1
data$world.opening.priority[data$v101j == "don't know"] <- NA

################################################################################################
################################## Making Political Knowledge IRT ##############################
################################################################################################

## reset seed 

library(pscl)
library(emIRT)


set.seed(123)
knowledgeQuestions <- names(data[,which(colnames(data) %in% c("pol.know1", "pol.know2", "pol.know3", "pol.know4", "pol.know5", "pol.know6"))])

RC.list <- data[,which(colnames(data) %in% knowledgeQuestions)]
RC <- matrix(unlist(RC.list), nrow = nrow(data), ncol = 6, byrow = FALSE)
colnames(RC) <- knowledgeQuestions

head(RC)
head(RC.list)


# ## Making a rollcall object using pscl
rc.irt <- rollcall(RC, yea = 1, nay = 0, missing = NA, vote.names = knowledgeQuestions)
# 
summary(rc.irt, verbose = TRUE)
# 
# ## Making RC object from roll call
# 
MyRC <- convertRC(rc.irt, type = "binIRT")
# 

## Making priors and starts

RCpriors <- makePriors(MyRC$n, MyRC$m, 1)
RCstarts <- getStarts(MyRC$n, MyRC$m, 1)

## Running the binIRT model using Expectation Maximization from the emIRT package
# # 
lout <- binIRT(.rc = MyRC,
               .starts = RCstarts,
               .priors = RCpriors,
               .control = {
                 list(threads = 1,
                      verbose = TRUE,
                      thresh = 1e-6
                 )
               }
)

save(lout, file = "LeftRightKnowledgeIRT.Rda")



### Note: Check beta_1 discrimination parameters - sometimes beta_1 signs are all negative. If that's the case, then we need to multiply our ideal points by -1. With this seed, that should not be necessary. 

lout$means$beta

## Making new column of mean ideal points

irt.knowledge <- as.vector(lout$means$x) 
head(irt.knowledge)

data$irt.knowledge <- irt.knowledge




## Rescaling Knowledge Measure to have mean 0 and std dev 1 ##

data$irt.dge <- scale(data$irt.knowledge, center = TRUE, scale = TRUE)
summary(data$irt.knowledge)

bottomThird <- quantile(data$irt.knowledge, c(0.333))
middleThird <- quantile(data$irt.knowledge, c(0.666))


quantile(data$irt.knowledge, c(.05, .10, .25, .3, .33, .35, .5, .6, .66, .69, .7, .75, .9, .95), na.rm = TRUE)


### Make our politcal knowledge variable

data$wordInformation[data$irt.knowledge < bottomThird ] <- "Low"
data$wordInformation[data$irt.knowledge >= bottomThird & data$irt.knowledge <= middleThird ] <- "Medium"
data$wordInformation[data$irt.knowledge > middleThird ] <- "High"

data$wordInformation <- factor(data$wordInformation, levels = c("Low", "Medium", "High"))
table(data$wordInformation)

################################################################################################
#################### Regression Analysis of Left-Right Placement  ##############################
################################################################################################

library(psych)

predictorsList <- names(data[,c(490:496, 498:504,514)])

bivariateLMs <- lapply(predictorsList, function(x){
  lm(substitute(own.ideology ~ i, list(i = as.name(x))), data = data)
})
# lapply(bivariateLMs, summary)

Reduce(rbind, lapply(bivariateLMs, function(lm) tidy(lm)[2,c("term", "estimate", "std.error", "statistic")]))

summary(lm(own.ideology ~ ccp + female + edulevel + log(1 + inc.year), data = data))

################################################################################################
#################### Correlation Analysis of Left-Right Placement  #############################
################################################################################################

correlations <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels

# questionLabels <- c("Too many political parties\nproduce chaos", "Government should control\nspread of information", "Broadening democracy will\naffect stability", "People should unconditionally\nsupport the government", "Our government fits\nChina's circumstances", "The government is like a \nfamily, should be obeyed", "China's political system is\nthe best in the world", "The pace of political reform\nis too fast", "The pace of social change\nis too fast", "Ethnic minorities should not\nask for independence", "Government should restrict\nindividual income", "The development of private\nenterprise should be restricted", "China should not carry out\npolitical reform", "Use force if Taiwan\ndeclares independence", "Can leave all decisions\nto morally upright leaders")

questionLabels <- c("Too many political parties produce chaos", "Government should control spread of information", "Broadening democracy will affect stability", "Should unconditionally support the government", "Our government fits China's circumstances", "The government is like a family, should be obeyed", "China's political system is the best in the world", "The pace of political reform is too fast", "The pace of social change is too fast", "Ethnic minorities should not ask for independence", "Government should restrict individual income", "Restrict development of private enterprise", "China should not carry out political reform", "Use force if Taiwan declares independence", "Can leave all decisions to morally upright leaders")


# Create vector of issue types

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Social\nIssues","Political\nIssues","Economic\nIssues","Economic\nIssues","Political\nIssues","Political\nIssues","Social\nIssues")

bivariateCorrs <- cbind(predictorsList, correlations, questionLabels, issueType)


# ordered by estimate

orderedCorrs <- bivariateCorrs[order(bivariateCorrs$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs)[names(orderedCorrs) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs$questionLabels <- factor(orderedCorrs$questionLabels, levels = orderedCorrs$questionLabels[order(orderedCorrs$estimate)])


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1), limits = c(-0.157, 0.131)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("1993_L-R_Correlations.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs)
dev.off()



################################################################################################
############## Figure 1a), 1b), and 1c): Graphic Representation of Responses ###################
################################################################################################


library(scales)


barplot <- ggplot(data, aes(x = own.ideology)) +
  geom_bar(fill = "slategray2") +
  scale_x_continuous(breaks = pretty_breaks(n = 6), limits = c(0.5,6.5)) +
  xlab("Self-Placement on the Left-Right Scale") +
  ylab("Number of Respondents") +
  theme_bw() +
  annotate("text", label = "Nonresponse Rate:\n 37.8%", x = 5.65, y = 750, size = 5.8) +
  theme(text = element_text(size=16))

## Figure 1a
pdf("1993_barplot.pdf", width = 9, height = 6)
barplot
dev.off()

barplot.kmt <- ggplot(data, aes(x = kmt.ideology)) +
  geom_bar(fill="slategray2") +
  scale_x_continuous(breaks = pretty_breaks(n = 6), limits = c(0.5,6.5)) +
  xlab("KMT Placement on the Left-Right Scale") +
  ylab("Number of Respondents") +
  theme_bw() +
  annotate("text", label = "Nonresponse Rate:\n 61.9%", x = 1.35, y = 650, size = 5.8) +
  theme(text = element_text(size=16)) 

## Figure 1b
pdf("1993_kmt_barplot.pdf", width = 9, height = 6)
barplot.kmt
dev.off()

barplot.ccp <- ggplot(data, aes(x = ccp.ideology)) +
  geom_bar(fill="slategray2") +
  scale_x_continuous(breaks = pretty_breaks(n = 6), limits = c(0.5,6.5)) +
  xlab("CCP Placement on the Left-Right Scale") +
  ylab("Number of Respondents") +
  theme_bw() +
  annotate("text", label = "Nonresponse Rate:\n 42.3%", x = 5.65, y = 450, size = 5.8) +
  theme(text = element_text(size=16)) 

## Figure 1c
pdf("1993_ccp_barplot.pdf", width = 9, height = 6)
barplot.ccp
dev.off()


################################################################################################
####################### Subgroup Analysis: High Political Knowledge  ###########################
################################################################################################

correlations.highinfo <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data[data$wordInformation == "High", ]$own.ideology, data[data$wordInformation == "High", ][[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

bivariateCorrs.hi <- cbind(predictorsList, correlations.highinfo, questionLabels, issueType)


# ordered by estimate

orderedCorrs.hi <- bivariateCorrs.hi[order(bivariateCorrs.hi$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.hi)[names(orderedCorrs.hi) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs.hi$questionLabels <- factor(orderedCorrs.hi$questionLabels, levels = orderedCorrs.hi$questionLabels[order(orderedCorrs.hi$estimate)])


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point() +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15), limits = c(-0.195, 0.195)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("1993_L-R_Correlations_highinfo.pdf", width = 8.5, height = 7)
credplot.gg(orderedCorrs.hi)
dev.off()


################################################################################################
################################### Multiple Measures Analysis  ################################
################################################################################################

# Scale all answers to the 15 questions so the answers have mean = 0 and sd = 1.
# NOTE CODE IS NOW CHANGED TO NOT BE SCALED
reduced_data = data[,which(colnames(data)%in%predictorsList)]

# Create index variable for economic questions
reduced_data$econ.index <- rowMeans(reduced_data[,which(colnames(reduced_data)%in%c("restrict.enterprise", "restrict.income"))], na.rm = TRUE)

# Create index variable for economic questions
reduced_data$social.index <- rowMeans(reduced_data[,which(colnames(reduced_data)%in%c("social.change.too.fast", "delegate.worthy"))], na.rm = TRUE)

# Create index variable for political questions
reduced_data$political.index <- rowMeans(reduced_data[,!(colnames(reduced_data) %in% c("social.change.too.fast", "delegate.worthy", "restrict.enterprise", "restrict.income"))], na.rm = TRUE)

# Create index variable for all questions
reduced_data$policy.index <- rowMeans(reduced_data[,1:15], na.rm = TRUE)

# Add back ideology and demographic variables
reduced_data <- cbind(reduced_data, data$own.ideology, data$ccp.ideology, data$father.ideology, data$kmt.ideology, data$ccp, data$years, data$female, data$edulevel, data$inc.year)

# Restore names

colnames(reduced_data)[colnames(reduced_data) == 'data$own.ideology'] <- 'own.ideology'
colnames(reduced_data)[colnames(reduced_data) == 'data$ccp.ideology'] <- 'ccp.ideology'
colnames(reduced_data)[colnames(reduced_data) == 'data$father.ideology'] <- 'father.ideology'
colnames(reduced_data)[colnames(reduced_data) == 'data$kmt.ideology'] <- 'kmt.ideology'
colnames(reduced_data)[colnames(reduced_data) == 'data$ccp'] <- 'ccp'
colnames(reduced_data)[colnames(reduced_data) == 'data$years'] <- 'years'
colnames(reduced_data)[colnames(reduced_data) == 'data$female'] <- 'female'
colnames(reduced_data)[colnames(reduced_data) == 'data$edulevel'] <- 'edulevel'
colnames(reduced_data)[colnames(reduced_data) == 'data$inc.year'] <- 'inc.year'


# Examine Correlation

cor.test(reduced_data$own.ideology, reduced_data$econ.index)
cor.test(reduced_data$own.ideology, reduced_data$political.index)
cor.test(reduced_data$own.ideology, reduced_data$social.index)

# Correlation Matrix between sixteen questions
library(Hmisc)
inter.corr <- rcorr(as.matrix(reduced_data[,1:15]))


# Function to simplify into data frame
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
  )
}

flattenCorrMatrix(inter.corr$r, inter.corr$P)

# Calculate average inter-item correlation

mean(flattenCorrMatrix(inter.corr$r, inter.corr$P)$cor)

# Examine regression if we consider both policy index and perceived ideology of CCP, KMT, and Father

summary(lm(own.ideology ~ econ.index + political.index + social.index + ccp + years + female + edulevel + inc.year + ccp.ideology, data = reduced_data))




################################################################################################
######## Top panel of Figure 8: Correlation Analysis of Partisan and Symbolic Issues  ##########
################################################################################################

## Some code is repeated with some variations in this section and the next to produce the equivalence 
## analysis, which adds a dashed vertical line that denotes a negligible effect.
 

## Create list of partisan and symbolic variables

symbolicList <- names(data[,c(441, 487, 489, 505, 511, 512)])

correlations.sym <- Reduce(rbind, lapply(symbolicList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("Communist Party Membership", "CCP Left-Right Placement", "KMT Left-Right Placement", "Gang of Four should be thoroughly abolished", "Like the United States", "Like Japan")


# Create vector of issue types

issueType <- c("1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey")

bivariateCorrs.sym <- cbind(symbolicList, correlations.sym, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym <- bivariateCorrs.sym[order(bivariateCorrs.sym$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym)[names(orderedCorrs.sym) == 'symbolicList'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym$symLabels <- factor(orderedCorrs.sym$symLabels, levels = orderedCorrs.sym$symLabels[order(orderedCorrs.sym$estimate)])

# Save our file so we can combine later
save(orderedCorrs.sym, file = "symCorrs.Rda")


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("1993_L-R_correlations_sym.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs.sym)
dev.off()

### Equivalence Testing ##
## Create list of partisan and symbolic variables

correlations.sym90 <- Reduce(rbind, lapply(symbolicList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("Communist Party Membership", "CCP Left-Right Placement", "KMT Left-Right Placement", "Gang of Four should be thoroughly abolished", "Like the United States", "Like Japan")


# Create vector of issue types

issueType <- c("1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey")

bivariateCorrs.sym90 <- cbind(symbolicList, correlations.sym90, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym90 <- bivariateCorrs.sym90[order(bivariateCorrs.sym90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym90)[names(orderedCorrs.sym90) == 'symbolicList'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym90$symLabels <- factor(orderedCorrs.sym90$symLabels, levels = orderedCorrs.sym90$symLabels[order(orderedCorrs.sym90$estimate)])

# Save our file so we can combine later
save(orderedCorrs.sym90, file = "symCorrs90.Rda")

credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

## Top panel of Figure 8
pdf("1993_L-R_correlations_sym90.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs.sym90, orderedCorrs.sym)
dev.off()

################################################################################################
################################## Aldrich-McKelvey Scaling  ###################################
################################################################################################

library(tidyverse)
library(klaR)
set.seed(101)

library(MASS)
library(foreign)
library(basicspace)

positions <- cbind(reduced_data$own.ideology, reduced_data$ccp.ideology, reduced_data$kmt.ideology)

#### Only accept complete data - change positions to NA if not rate all parties - can change back if desired

incomplete.positions <- !complete.cases(positions)
positions[incomplete.positions, ] = NA

####


mode(positions) <- "double"
colnames(positions) <- c("Self", "CCP", "KMT")

result <- aldmck(positions, polarity = 2, respondent = 1, verbose = TRUE)
summary(result)

summary(result$respondents$intercept)

voters <- na.omit(result$respondents)

# plot(density(voters[,3]), main="Population Distribution", xlab="Left - Right", xlim=c(-2.2,2.2), lwd=2)
# parties <- na.omit(result$stimuli)
# party.names <- colnames(positions)[-1]
# total.n <- nrow(voters) + length(parties)
# cols <- c("gray33", "gray67")
# 
# pdf("1993_ald_mck_plot.pdf", width = 8.5, height = 6)
# plot(density(voters[,3]), main="Distribution of Ideology in China", xlab="Left - Right", xlim=c(-2.2,2.2), lwd=2)
# points(parties, rep(0, length(parties)), pch=rep(15:18, 2), col=rep(cols, each=4), cex=1.5)
# legend("topleft", c(party.names, paste("N = ", total.n, sep="")), pch=c(rep(15:18, 2), NA), col=c(rep(cols, each=4), NA), pt.cex=1.5, inset=.01, bty="n")
# dev.off()


### Basic Space scaling for individuals

# symbolicList.2 <- names(data[,505, 511, 512])

all.positions <- cbind(reduced_data[,c(1:15)], data[,505])
# mode(all.positions) <- "double"
colnames(all.positions) <- c(predictorsList[1:15], "abolish.gang")

basic.space.result <- blackbox(all.positions, dims=1, minscale=1, verbose=T)

summary(basic.space.result)


x.1 <- basic.space.result$individuals
# xx <- basic.space.result$individuals[[2]][,1]
# yy <- basic.space.result$individuals[[2]][,2]
# party <- reduced_data$ccp
# 
# plot(xx, yy, main="Basic Space Coordinates", xlab="First Dimension", ylab = "Second Dimension", asp=1, type = "n")
# points(xx[party==1], yy[party==1], pch="C", col="gray67", font=1)
# points(xx[party==0], yy[party==0], pch="N", col="gray33", font=1)
# 
# names(basic.space.result)

### Blackbox Transpose Scaling
# 
# selected <- sample(1:3287, 1500, replace = FALSE)
# 
# result.transpose <- blackbox_transpose(positions[selected,], dims = 2, minscale = 3, verbose = TRUE)
# 
# summary(result.transpose)

basic.x1 <- unlist(as.vector(x.1))
# basic.x <- as.vector(xx)
# basic.y <- as.vector(yy)

### Adding our scales 

# reduced_data <- cbind(reduced_data, basic.x, basic.y)
reduced_data <- cbind(reduced_data, basic.x1)
## Add in Aldrich-McKelvey Result
a.m.ideal <- as.vector(result$respondents$idealpt)
alpha_resp <- as.vector(result$respondents$intercept)
beta_resp <- as.vector(result$respondents$weight)
reduced_data <- cbind(reduced_data, a.m.ideal, alpha_resp, beta_resp)



################################################################################################
############################## Bayesian Aldrich-McKelvey Scaling  ##############################
################################################################################################

set.seed(132)

library(rjags)
library(foreign)
library(coda)


min.positions <- cbind(reduced_data$ccp.ideology, reduced_data$kmt.ideology)

### Make cutoff such that they have to place something to be estimated

cutoff <- 2

own.ideology <- reduced_data$own.ideology[rowSums(!is.na(min.positions)) >= cutoff]

party.members <- reduced_data$ccp[rowSums(!is.na(min.positions)) >= cutoff]

min.positions.TT <- min.positions[rowSums(!is.na(min.positions)) >= cutoff,]

N <- nrow(min.positions.TT)
q <- ncol(min.positions.TT)
z <- min.positions.TT
#
alpha.starts <- runif(N,-2,2)
beta.starts <- runif(N,-2,2)
#
zhat.starts <- runif(q,-1,1)
zhat.starts[1] <- -1 + runif(1,-0.099,0.099)
zhat.starts[2] <- 1 + runif(1,-0.099,0.099)
#
inits <- function() {list (zhat=zhat.starts, a=alpha.starts, b=beta.starts)}
#

cat('
    # BUGS/JAGS Code to generate DIF-corrected estimates of stimuli positions 
    # DIF-corrected respondent positions can be derived post-estimation using 
    # the a and b parameters and the reported self-placements.
    # z[i,j] is reported respondent i placement of stim j (self-placements have been removed)
    # N is number of rows
    # q is number of columns
    # zhat[j] is true position of stimulus j
    # a[i] and b[i] are individual linear mapping params
    
    model{
    for(i in 1:N){   ##loop through respondents
    for(j in 1:q){ ##loop through stimuli
    z[i,j] ~ dnorm(mu[i,j], tau[i,j])
    mu[i,j] <- a[i] + b[i]*zhat[j]
    tau[i,j] <- tauj[j] * taui[i]
    }	
    }
    
    ##priors on a and b
    for(i in 1:N){
    a[i] ~ dunif(-100, 100)
    b[i] ~ dunif(-100, 100)
    }
    
    ##priors on variance
    for(j in 1:q){
    tauj[j] ~ dgamma(.1,.1)
    }
    for(i in 1:N){
    taui[i] ~ dgamma(ga,gb)
    }
    ga ~ dgamma(.1,.1)
    gb ~ dgamma(.1,.1)
    
    ##priors on zhat
    zhat[1] ~ dnorm(0,1)T(-1.1,-0.9)
    zhat[2] ~ dnorm(0,1)T(0.9,1.1)
    for (j in 3:q){
    zhat[j] ~ dnorm(0,1)
    }
    
    sigmai.sq <- 1/taui
    sigmaj.sq <- 1/tauj
    
    }', file={f <- tempfile()})

# IN REALITY THIS CODE IS RUN BUT IT TAKES A LONG TIME

# BAM.sim <- jags.model(f,
#                       data = list('z' = z,'q' = q,'N' = N),inits = inits, n.chains = 2, n.adapt = 10000) 
# update(BAM.sim, n.iter = 10000)
# 
# 
# params <- c("zhat", "a", "b", "tauj", "sigmaj.sq")
# samps <- coda.samples(BAM.sim, params, n.iter=2500, thin=5)


# #  DIAGNOSTICS
# #
# plot(zhat[,1:2])
# geweke.diag(zhat)
# gelman.diag(zhat)
# #
# #

## Save to disk

# save(samps, file = "~/Dropbox/Dissertation/Ideological_Labels/1993_samps.Rda")

### Need to define vector of own ideology labels

self.1993 <- own.ideology


#### Looking at zhat
load("~/Dropbox/Dissertation/Ideological_Labels/1993_samps.Rda")

#### 

zhat.1993 <- summary(samps[,grep("zhat", colnames(samps[[1]]))])
sigmaj.sq.1993 <- summary(samps[,grep("sigmaj.sq", colnames(samps[[1]]))])

a.1993 <- samps[,grep(pattern="a[", colnames(samps[[1]]), fixed=TRUE),]
b.1993 <- samps[,grep(pattern="b[", colnames(samps[[1]]), fixed=TRUE),]
a_point_est.1993 <- summary(a.1993)[[1]][,1]
b_point_est.1993 <- summary(b.1993)[[1]][,1]
all.a.1993 <- do.call("rbind", a.1993)
all.b.1993 <- do.call("rbind", b.1993)
#
nsamp.1993 <- nrow(all.a.1993)
nresp.1993 <- ncol(all.a.1993)
#
idealpt.1993 <- rep(NA, nsamp.1993*nresp.1993)
dim(idealpt.1993) <- c(nsamp.1993,nresp.1993)
for (i in 1:nresp.1993){
  for (j in 1:nsamp.1993){
    idealpt.1993[j,i] <- ((self.1993[i] - all.a.1993[j,i]) / all.b.1993[j,i])
  }}
#
idealpt.percentiles.1993 <- rep(NA,nresp.1993*3)
dim(idealpt.percentiles.1993) <- c(nresp.1993,3)
colnames(idealpt.percentiles.1993) <- c("5%","50%","95%")
for (i in 1:nresp.1993){
  idealpt.percentiles.1993[i,] <- quantile(idealpt.1993[,i],probs=c(0.05,0.5,0.95),na.rm=TRUE)
}
idealpt.estimates.1993 <- idealpt.percentiles.1993[,2]



#  DENSITY PLOT OF THE ORIGINAL AND RE-SCALED IDEAL POINT ESTIMATES
#
#   %%1993%%
all.1993 <- na.omit(idealpt.estimates.1993)
positive.weights.1993 <- na.omit(idealpt.estimates.1993[b_point_est.1993 > 0])
#
all.Share.1993 <- length(all.1993)/(length(all.1993)+length(positive.weights.1993))
positive.weights.Share.1993 <- length(positive.weights.1993)/(length(all.1993)+length(positive.weights.1993))
#
all.dens.1993 <- density(all.1993)
all.dens.1993$y <- all.dens.1993$y*all.Share.1993
#
positive.weights.dens.1993 <- density(positive.weights.1993)
positive.weights.dens.1993$y <- positive.weights.dens.1993$y*positive.weights.Share.1993


### Figure 4: Histogram of BAM Ideal Point Estimates

pdf("~/Dropbox/Dissertation/Ideological_Labels/Replication/1993_raw_corrected_left_right_hist_only.pdf", width=4, height=4)
all.1993.HIST <- all.1993
all.1993.HIST[all.1993.HIST < -3] <- -3
all.1993.HIST[all.1993.HIST > 3] <- 3
#
pos.1993.HIST <- positive.weights.1993
pos.1993.HIST[pos.1993.HIST < -3] <- -3
pos.1993.HIST[pos.1993.HIST > 3] <- 3
#
tmp.2 <- hist(all.1993.HIST, breaks=seq(-3,3,by=0.25),
              xlab="",ylab="",main="",
              right=FALSE, freq=TRUE, col="gray75", ylim=c(-30,500))
tmp.3 <- hist(pos.1993.HIST, breaks=seq(-3,3,by=0.25),
              right=FALSE,freq=TRUE, col="gray50", add=TRUE)
text(zhat.1993[[1]][1,1],-27,"CCP",col="red",font=2,cex=1.2)
text(zhat.1993[[1]][2,1],-27,"KMT",col="blue",font=2,cex=1.2)
#
# Main title
mtext("BAM Ideal Point Estimates",side=3,line=1.50,cex=1.2,font=2)
# x-axis title
mtext("Left-Right",side=1,line=3.25,cex=1.1)
# y-axis title
mtext("Frequency",side=2,line=2.75,cex=1.1)
dev.off()

#
#
#  BOX-AND-WHISKER PLOTS OF ALPHA VALUES BY IDEOLOGICAL
#  SELF-PLACEMENTS AND PARTY IDENTIFICATION


### Figure 3b
pdf("~/Dropbox/Dissertation/Ideological_Labels/Replication/1993_avalues.pdf", width=8, height=4)
avalues <- summary(a.1993)[[1]][,1]
#
alpha.plot <- bwplot(avalues ~ self.1993, horizontal=FALSE,
                     main=list(label="Ideological Self-Placement and Alpha (Shift) Term",cex=1.4),
                     xlab=list(label="Self-Placement (Left to Right)",cex=1.3),
                     ylab=list(label="Alpha Posterior Mean",cex=1.3),
                     scales=list(cex=1.2)
)
#


update(alpha.plot, panel = function(...) {
  panel.abline(h=0, lwd=2, lty=1, col="light grey")
  panel.bwplot(...)
})
dev.off()
#



### Figure 3c
pdf("~/Dropbox/Dissertation/Ideological_Labels/Replication/1993_bvalues.pdf", width=8, height=4)
bvalues <- summary(b.1993)[[1]][,1]
#
beta.plot <- bwplot(bvalues ~ self.1993, horizontal=FALSE,
                    main=list(label="Ideological Self-Placement and Beta (Stretch) Term",cex=1.4),
                    xlab=list(label="Self-Placement (Left to Right)",cex=1.3),
                    ylab=list(label="Beta Posterior Mean",cex=1.3),
                    scales=list(cex=1.2)
)

update(beta.plot, panel = function(...) {
  panel.abline(h=0, lwd=2, lty=1, col="light grey")
  panel.bwplot(...)
})
dev.off()





#  BOX-AND-WHISKER PLOTS OF BAM VALUES BY IDEOLOGICAL
#  SELF-PLACEMENTS 

## Figure 3a
pdf("~/Dropbox/Dissertation/Ideological_Labels/Replication/1993_bamvalues.pdf", width=8, height=4)
# need to make sure the two vectors are of equal lengths
self.1993.complete <- na.omit(self.1993)
bam.plot <- bwplot(all.1993.HIST ~ self.1993.complete, horizontal=FALSE,
                   main=list(label="Ideological Self-Placement and BAM Scores",cex=1.4),
                   xlab=list(label="Self-Placement (Left to Right)",cex=1.3),
                   ylab=list(label="Bayesian Aldrich-McKelvey Scores",cex=1.3),
                   scales=list(cex=1.2)
)
# Note that BAM scores in excess of +3 or -3 are set to those values.
# Note that warnings are a function of the repetition in self-placements. 
# Can be eliminated by increasing the span of panel.loess
update(bam.plot, panel = function(...) {
  panel.abline(h=0, lwd=2, lty=1, col="light grey")
  # panel.loess(self.1993.complete, all.1993.HIST, span = 2/3, col = "dark grey")
  panel.bwplot(...)
})
dev.off()


################################################################################################
################ Figure 5: Equivalence Testing using 90% Confidence Intervals  #################
################################################################################################


correlations90 <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels

# questionLabels <- c("Too many political parties\nproduce chaos", "Government should control\nspread of information", "Broadening democracy will\naffect stability", "People should unconditionally\nsupport the government", "Our government fits\nChina's circumstances", "The government is like a \nfamily, should be obeyed", "China's political system is\nthe best in the world", "The pace of political reform\nis too fast", "The pace of social change\nis too fast", "Ethnic minorities should not\nask for independence", "Government should restrict\nindividual income", "The development of private\nenterprise should be restricted", "China should not carry out\npolitical reform", "Use force if Taiwan\ndeclares independence", "Can leave all decisions\nto morally upright leaders")

questionLabels <- c("Too many political parties produce chaos", "Government should control spread of information", "Broadening democracy will affect stability", "Should unconditionally support the government", "Our government fits China's circumstances", "The government is like a family, should be obeyed", "China's political system is the best in the world", "The pace of political reform is too fast", "The pace of social change is too fast", "Ethnic minorities should not ask for independence", "Government should restrict individual income", "Restrict development of private enterprise", "China should not carry out political reform", "Use force if Taiwan declares independence", "Can leave all decisions to morally upright leaders")


# Create vector of issue types

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Social\nIssues","Political\nIssues","Economic\nIssues","Economic\nIssues","Political\nIssues","Political\nIssues","Social\nIssues")

bivariateCorrs90 <- cbind(predictorsList, correlations90, questionLabels, issueType)


# ordered by estimate

orderedCorrs90 <- bivariateCorrs90[order(bivariateCorrs90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs90)[names(orderedCorrs90) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs90$questionLabels <- factor(orderedCorrs90$questionLabels, levels = orderedCorrs90$questionLabels[order(orderedCorrs90$estimate)])

### Modified 90 and 95% confidence intervals - twist on credplot.gg
credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15), limits = c(-0.16, 0.16)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

## Figure 5

pdf("1993_L-R_Correlations90.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs90, orderedCorrs)
dev.off()


################################################################################################
#################### Correlation Analysis of Aldrich McKelvey Scores  #############################
################################################################################################

data_am <- cbind(data, a.m.ideal)

correlations_am <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data_am$a.m.ideal, data_am[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels

# questionLabels <- c("Too many political parties\nproduce chaos", "Government should control\nspread of information", "Broadening democracy will\naffect stability", "People should unconditionally\nsupport the government", "Our government fits\nChina's circumstances", "The government is like a \nfamily, should be obeyed", "China's political system is\nthe best in the world", "The pace of political reform\nis too fast", "The pace of social change\nis too fast", "Ethnic minorities should not\nask for independence", "Government should restrict\nindividual income", "The development of private\nenterprise should be restricted", "China should not carry out\npolitical reform", "Use force if Taiwan\ndeclares independence", "Can leave all decisions\nto morally upright leaders")

questionLabels <- c("Too many political parties produce chaos", "Government should control spread of information", "Broadening democracy will affect stability", "Should unconditionally support the government", "Our government fits China's circumstances", "The government is like a family, should be obeyed", "China's political system is the best in the world", "The pace of political reform is too fast", "The pace of social change is too fast", "Ethnic minorities should not ask for independence", "Government should restrict individual income", "Restrict development of private enterprise", "China should not carry out political reform", "Use force if Taiwan declares independence", "Can leave all decisions to morally upright leaders")


# Create vector of issue types

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Social\nIssues","Political\nIssues","Economic\nIssues","Economic\nIssues","Political\nIssues","Political\nIssues","Social\nIssues")

bivariateCorrs_am <- cbind(predictorsList, correlations_am, questionLabels, issueType)


# ordered by estimate

orderedCorrs_am <- bivariateCorrs_am[order(bivariateCorrs_am$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs_am)[names(orderedCorrs_am) == 'predictorsList'] <- 'orderedPredictors_am'

# make the questionLabels an ordered factor for ggplot

orderedCorrs_am$questionLabels <- factor(orderedCorrs_am$questionLabels, levels = orderedCorrs_am$questionLabels[order(orderedCorrs_am$estimate)])


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1), limits = c(-0.157, 0.131)) +
    xlab('Correlation with Aldrich-McKelvey Scores') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("1993_AM_Correlations.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs_am)
dev.off()



################################################################################################
########### Figure 2 - Double Box Plot of perceived party positions  ###########################
################################################################################################

### Double Boxplot
library(reshape2)

double_data <- subset(reduced_data, !is.na(own.ideology))
double_data <- double_data[,c("own.ideology", "ccp.ideology", "kmt.ideology")]
names(double_data) <- c("own.ideology", "CCP", "KMT")


double_data_m <- melt(double_data, variable.name = "Party", value.name = "Ideology", id.vars = "own.ideology")


## Figure 2 
pdf("1993doubleboxplot.pdf", width = 8.5, height = 7)
ggplot(data=double_data_m, aes(x=as.factor(own.ideology), y=Ideology, color=Party, fill=Party)) +
  geom_boxplot(alpha=0.7, outlier.shape = NA) + 
  scale_y_continuous(breaks = 1:6) +
  scale_fill_manual(values=c("firebrick", "cornflowerblue")) +
  scale_color_manual(values=c("firebrick", "cornflowerblue")) +
  xlab("Self-Reported Ideology\n(Left-Right)") + 
  ylab("Perceived Political Party Ideology\n(Left-Right)") +
  #   geom_jitter(alpha=0.2, size = .02, width = 0.4, height = 0.02) +
  coord_fixed() +
  theme_bw()
dev.off()



################################################################################################
################# Figure 7: Equivalence Analysis of Aldrich McKelvey Scores  ###################
################################################################################################

correlations_am90 <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data_am$a.m.ideal, data_am[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels

# questionLabels <- c("Too many political parties\nproduce chaos", "Government should control\nspread of information", "Broadening democracy will\naffect stability", "People should unconditionally\nsupport the government", "Our government fits\nChina's circumstances", "The government is like a \nfamily, should be obeyed", "China's political system is\nthe best in the world", "The pace of political reform\nis too fast", "The pace of social change\nis too fast", "Ethnic minorities should not\nask for independence", "Government should restrict\nindividual income", "The development of private\nenterprise should be restricted", "China should not carry out\npolitical reform", "Use force if Taiwan\ndeclares independence", "Can leave all decisions\nto morally upright leaders")

questionLabels <- c("Too many political parties produce chaos", "Government should control spread of information", "Broadening democracy will affect stability", "Should unconditionally support the government", "Our government fits China's circumstances", "The government is like a family, should be obeyed", "China's political system is the best in the world", "The pace of political reform is too fast", "The pace of social change is too fast", "Ethnic minorities should not ask for independence", "Government should restrict individual income", "Restrict development of private enterprise", "China should not carry out political reform", "Use force if Taiwan declares independence", "Can leave all decisions to morally upright leaders")


# Create vector of issue types

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Political\nIssues","Social\nIssues","Political\nIssues","Economic\nIssues","Economic\nIssues","Political\nIssues","Political\nIssues","Social\nIssues")

bivariateCorrs_am90 <- cbind(predictorsList, correlations_am90, questionLabels, issueType)


# ordered by estimate

orderedCorrs_am90 <- bivariateCorrs_am90[order(bivariateCorrs_am90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs_am90)[names(orderedCorrs_am90) == 'predictorsList'] <- 'orderedPredictors_am'

# make the questionLabels an ordered factor for ggplot

orderedCorrs_am90$questionLabels <- factor(orderedCorrs_am90$questionLabels, levels = orderedCorrs_am90$questionLabels[order(orderedCorrs_am90$estimate)])


credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.10, 0.15), limits = c(-0.16, 0.16)) +
    xlab('Correlation with Aldrich-McKelvey Scores') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

## Figure 7
pdf("1993_AM_Correlations90.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs_am90, orderedCorrs_am)
dev.off()

###############################################################################################
########################## Symbolic Correlations with A-M Scores ##############################
###############################################################################################


## Create list of partisan and symbolic variables 

symbolicList_am <- names(data_am[,c(441, 487, 489, 505, 511, 512)])

correlations.sym_am <- Reduce(rbind, lapply(symbolicList_am, function(x){
  tidy(cor.test(data_am$a.m.ideal, data_am[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels_am <- c("Communist Party Membership", "CCP Left-Right Placement", "KMT Left-Right Placement", "Gang of Four should be thoroughly abolished", "Like the United States", "Like Japan")


# Create vector of issue types

issueType <- c("1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey")

bivariateCorrs.sym_am <- cbind(symbolicList_am, correlations.sym_am, symLabels_am, issueType)


# ordered by estimate

orderedCorrs.sym_am <- bivariateCorrs.sym_am[order(bivariateCorrs.sym_am$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym_am)[names(orderedCorrs.sym_am) == 'symbolicList_am'] <- 'orderedPredictors_am'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym_am$symLabels_am <- factor(orderedCorrs.sym_am$symLabels_am, levels = orderedCorrs.sym_am$symLabels_am[order(orderedCorrs.sym_am$estimate)])

issueClass <- c("Symbolic", "Symbolic", "Symbolic", "Partisan", "Partisan", "Partisan")

orderedCorrs.sym_am <- cbind(orderedCorrs.sym_am, issueClass)

# Save our file so we can combine later
save(orderedCorrs.sym_am, file = "symCorrs_am.Rda")


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=symLabels_am, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(aes(shape = issueClass), size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Aldrich-McKelvey Scores') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12), legend.position = 'none') + 
    scale_shape_manual(values = c(21, 16))
  return(p)
}



pdf("1993_L-R_correlations_sym_am.pdf", width = 8.5, height = 4)
credplot.gg(orderedCorrs.sym_am)
dev.off()

################################################################################################
############# Figure 9- Equivalence Analysis of Aldrich McKelvey Symbolic Scores  ##############
################################################################################################

correlations.sym_am90 <- Reduce(rbind, lapply(symbolicList_am, function(x){
  tidy(cor.test(data_am$a.m.ideal, data_am[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels_am <- c("Communist Party Membership", "CCP Left-Right Placement", "KMT Left-Right Placement", "Gang of Four should be thoroughly abolished", "Like the United States", "Like Japan")


# Create vector of issue types

issueType <- c("1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey","1993\nSurvey")

bivariateCorrs.sym_am90 <- cbind(symbolicList_am, correlations.sym_am90, symLabels_am, issueType)


# ordered by estimate

orderedCorrs.sym_am90 <- bivariateCorrs.sym_am90[order(bivariateCorrs.sym_am90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym_am90)[names(orderedCorrs.sym_am90) == 'symbolicList_am'] <- 'orderedPredictors_am'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym_am90$symLabels_am <- factor(orderedCorrs.sym_am90$symLabels_am, levels = orderedCorrs.sym_am90$symLabels_am[order(orderedCorrs.sym_am90$estimate)])

issueClass <- c("Symbolic", "Symbolic", "Symbolic", "Partisan", "Partisan", "Partisan")

orderedCorrs.sym_am90 <- cbind(orderedCorrs.sym_am90, issueClass)

# Save our file so we can combine later
save(orderedCorrs.sym_am90, file = "symCorrs_am90.Rda")


credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=symLabels_am, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(aes(shape = issueClass), size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Aldrich-McKelvey Scores') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12), legend.position = 'none') + 
    scale_shape_manual(values = c(21, 16))
  return(p)
}


## Figure 9
pdf("1993_L-R_correlations_sym_am90.pdf", width = 8.5, height = 4)
credplot.gg(orderedCorrs.sym_am90, orderedCorrs.sym_am)
dev.off()


################################################################################################
################### Parts of Appendix A.4 & A.5: Factor Analysis/PCA ###########################
################################################################################################

## Exploratory Factor Analysis

completeIndices <- complete.cases(data_am[,predictorsList])

fullQDataFrame <- data_am[completeIndices,predictorsList]

idealFrame <- data_am[completeIndices, c("own.ideology", "a.m.ideal")]

factanal(fullQDataFrame, 3, rotation = "promax")

library(lavaan)

## Confirmatory Factory Analysis

cfaModel1 <- 'Pol.Issues =~ pol.parties + info.control + democracy.unstable + uncond.support + pol.system + family.gov + best.system + anti.ethnic.indep + no.polreform + taiwan.force + delegate.worthy
Econ.Issues =~ restrict.income + restrict.enterprise
Change.Issues =~ pol.reform.too.fast + social.change.too.fast
'

### CFA Model for all citizens, unweighted data

cfaFit <- cfa(cfaModel1, data = fullQDataFrame)
## See fit statistics
summary(cfaFit, standardized = T, fit.measures = T, rsq = T)
## See loadings
inspect(cfaFit, what="std")






library(ade4)
library(factoextra)
library(magrittr)


out.dudi <- dudi.pca(fullQDataFrame, center = TRUE, scale = TRUE, scannf=FALSE, nf=25)

VarianceProp <- 100 * out.dudi$eig/sum(out.dudi$eig)

## Some descriptive statistics of results


## Appendix Figure A.4 a)
pdf("1993_pca_scree.pdf", width = 6, height = 4)
fviz_eig(out.dudi)
dev.off()


# screeplot(out.dudi, main = "Screeplot - Eigenvalues")

# fviz_pca_ind(out.dudi,
#              repel = TRUE     # Avoid text overlapping
# )

## Appendix Figure A.4 c)
pdf("1993_pca_vars.pdf", width = 9, height = 6)
fviz_pca_var(out.dudi, 
             axes = c(1,2),
             repel = TRUE     # Avoid text overlapping
)
dev.off()



idealPCAFrame <- cbind(idealFrame, out.dudi$li)

## Regression analysis of PCA dimensions vs. self-reported and A-M ideologies

summary(lm(own.ideology ~ Axis1, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis2, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis3, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis4, data = idealPCAFrame))

summary(lm(a.m.ideal ~ Axis1, data = idealPCAFrame))
summary(lm(a.m.ideal ~ Axis2, data = idealPCAFrame))
summary(lm(a.m.ideal ~ Axis3, data = idealPCAFrame))
summary(lm(a.m.ideal ~ Axis4, data = idealPCAFrame))

library("ggpubr")

## Appendix Figure A.5 a)
pdf("1993_pca_s1.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "own.ideology", y = "Axis1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Left-Right Self-Placement", ylab = "First Principal Component")
dev.off()


## Appendix Figure A.5 b)
pdf("1993_pca_s2.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "own.ideology", y = "Axis2", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Left-Right Self-Placement", ylab = "Second Principal Component")
dev.off()

## Appendix Figure A.5 c)

pdf("1993_pca_s1am.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "a.m.ideal", y = "Axis1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Aldrich-McKelvey Scores", ylab = "First Principal Component")
dev.off()

## Appendix Figure A.5 d)

pdf("1993_pca_s2am.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "a.m.ideal", y = "Axis2", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Aldrich-McKelvey Scores", ylab = "Second Principal Component")
dev.off()

### 15 dimensions for 15 questions
Dimensions <- c(1:15)


### Generate PCA dataframe
PCAdata <- cbind(Dimensions, VarianceProp)

PCAdata <- data.frame(PCAdata)



pdf("VariancePlot.pdf", width = 6, height = 4)
overallVar <- ggplot(data = PCAdata, aes(x = Dimensions, y = VarianceProp)) + 
  geom_line() + geom_point(size = 3) + xlab("Dimension") + ylab("Proportion of Variance Explained") + 
  scale_x_continuous() + scale_y_continuous() + ggtitle("Dimensionality of Ideology Data\nAll Respondents") + theme_bw() +  theme(plot.title=element_text(size=11))
overallVar
dev.off()

